Write a function kern_density and visually test how this performs for some hand constructed datasets and bandwidths
#Create the Epanechnikov kernel function with bandwidth = 1
epan_kernel <-function(x,h=1){
x <-x/h
a <- as.numeric(abs(x) <= 1)
value <- (3/4) * ( 1 - x^2 ) * a
return(value)
}
#Create the kernel density function.
#x: training vector
#x_new: test set
#h: bandwidth
#return kernel density estimate
h = 1
kern_density <- function(x, h, x_new){
sapply(x_new, function(k){
estimate <-mean(epan_kernel(k-x,h))/h
return(estimate)
})
}
# create a list of bandwidth for testing
h = c(0.01,0.1,0.5,1,2)
# hand construct a testing dataset
set.seed(666)
x <- rnorm(2000, 0, 1)
x_new <- sort(rnorm(100, 0, 1))
# visually test the function with different bandwidth
for (i in h){
plot(x_new, kern_density(x,i,x_new), xlab = "x",ylab = "Kernel Density", main = paste("Kernel Density Estimates for bandwidth =", i),type="l",col="orange")
}
From the plots we can see that as the bandwidth becomes larger, the kernel estimate becomes smoother.